* CLEANING AND VARIABLE CREATION: FIRST FOLLOW-UP DATA

*********************************************************************************

* FIRST FOLLOW-UP

use "${data}/r1_followup_raw.dta", clear

* Rename
ren questionnaire hh_id
ren a14a gpname
ren a14_b__gp_code gp_code
ren a14_d__tok_name tokname
ren a14_c__tok_code grp_code
ren a_15 headhhname
order hh_id gpname-headhhname, first
ren a61 IDvisit1
lab var IDvisit1 "Enumerator ID for Visit 1"
ren a62 IDvisit2
lab var IDvisit2 "Enumerator ID for Visit 2"
ren a63 IDvisit3
lab var IDvisit3 "Enumerator ID for Visit 3"
ren person_id IDcompleted
lab var IDcompleted "Enumerator ID at completed visit"
ren person_ie IDsupervisor
lab var IDsupervisor "Supervisor ID"
order ID*, a(headhhname)

drop if hh_id == 1852 & gpname == "Lili" // Duplicate

* Address households missing/not surveyed in first follow-up
gen hhmissing = .
foreach i of num 1 2 5 7 9 11 12 {

	replace hhmissing = a7_1 if a7_1 == `i'
	replace hhmissing = a7_2 if a7_2 == `i'
	replace hhmissing = a7_3 if a7_3 == `i'

	}
replace hhmissing = a7_1 if a7_1 == 1
replace hhmissing = a7_2 if a7_2 == 1
replace hhmissing = a7_3 if a7_3 == 1
replace hhmissing = 2 if hh_id == 1373 | hh_id == 1556
replace hhmissing = 1 if hh_id == 1395 | hh_id == 1449 | hh_id == 1372 | hh_id == 1381
label val hhmissing A7_1
lab var hhmissing "Whether household was surveyed, refused, missing, etc."
gen lostin2013 = 0
replace lostin2013 = 1 if hhmissing != 1
replace lostin2013 = 1 if hh_id == 1372
replace lostin2013 = 1 if hh_id == 1381	
lab var lostin2013 "Household was lost to study in 2013, but had been previously interviewed" 

* Fix GP codes
replace gp_code = "80" if gp_code == "3" & hh_id == 1913  
destring gp_code, replace
replace grp_code = "1" if grp_code == "Mansa" & hh_id == 1405
replace grp_code="0" if grp_code == "Naul" & hh_id == 1353
destring grp_code, replace

* Correct inconsistencies with GP codes (relative to baseline data)
replace gp_code = 77 if hh_id == 1590 & grp_code == 4
replace gp_code = 77 if hh_id == 1591 & grp_code == 4
replace gp_code = 77 if hh_id == 1592 & grp_code == 4
replace gp_code = 77 if hh_id == 1593 & grp_code == 4
replace gp_code = 77 if hh_id == 1594 & grp_code == 1
replace gp_code = 77 if hh_id == 1595 & grp_code == 4
replace gp_code = 77 if hh_id == 1596 & grp_code == 4
replace gp_code = 77 if hh_id == 1597 & grp_code == 5
replace gp_code = 77 if hh_id == 1598 & grp_code == 5
replace gp_code = 77 if hh_id == 1600 & grp_code == 4
replace gp_code = 77 if hh_id == 1601 & grp_code == 4
replace gp_code = 77 if hh_id == 1602 & grp_code == 5
replace gp_code = 77 if hh_id == 1603 & grp_code == 4
replace gp_code = 77 if hh_id == 1604 & grp_code == 4
replace gp_code = 77 if hh_id == 1605 & grp_code == 1
replace gp_code = 77 if hh_id == 1606 & grp_code == 4
replace gp_code = 77 if hh_id == 1607 & grp_code == 5
replace gp_code = 77 if hh_id == 1608 & grp_code == 4
replace gp_code = 77 if hh_id == 1609 & grp_code == 4
replace gp_code = 77 if hh_id == 1610 & grp_code == 1
replace gp_code = 77 if hh_id == 1611 & grp_code == 1
replace gp_code = 77 if hh_id == 1612 & grp_code == 1
replace gp_code = 77 if hh_id == 1615 & grp_code == 1
replace gp_code = 77 if hh_id == 1616 & grp_code == 1
replace gp_code = 77 if hh_id == 1620 & grp_code == 1
replace gp_code = 77 if hh_id == 1621 & grp_code == 1
replace gp_code = 77 if hh_id == 1625 & grp_code == 1
replace gp_code = 77 if hh_id == 1626 & grp_code == 1
replace gp_code = 77 if hh_id == 1627 & grp_code == 1
replace gp_code = 77 if hh_id == 1599 & grp_code == 4
replace gp_code = 84 if hh_id == 1259 & grp_code == 6
replace gp_code = 54 if hh_id == 1500 & grp_code == 5
replace grp_code = 3 if hh_id == 1695 & gp_code == 51
replace grp_code = 1 if hh_id == 1848 & gp_code == 71
replace grp_code = 0 if hh_id == 2045 & gp_code == 56

* General cleaning and variable creation

local household_members "01 02 03 04 05 06 07 08 09 10 11 12 13 14 15"

foreach i in `household_members' {

	* Household members (for household size)
	clonevar hh_member_`i' = c2_`i' 
	replace hh_member_`i' = trim(hh_member_`i')
	tostring hh_member_`i', replace
	
	* Age of household members (for number of children under five and age of household head)
	clonevar age_year_`i' = c5_yr_`i'
	clonevar age_month_`i' = c5_mm_`i'
	clonevar age_day_`i' = c5_dd_`i'
	
	* Household-head indicator
	gen household_head_`i' = 0
	replace household_head_`i' = c6_`i' if c6_`i' == 1
	
	* Sex of household members (for female head of household)
	clonevar sex_`i' = c4_`i'
	
	* Primary-cook indicator
	clonevar cooks_`i' = c8_`i'
	destring cooks_`i', replace
	replace cooks_`i' = 2 if cooks_`i' > 2 & !mi(cooks_`i')
	gen primary_cook_`i' = 0
	replace primary_cook_`i' = 1 if cooks_`i' == 1
		
	* Years of education and in/attended school indicator
	clonevar education_years_`i' =  c12_`i'
	clonevar school_indicator_`i' = c11_`i'
	
	* Cough-cold indicator
	clonevar cough_or_cold_`i' = d2_`i'
	}

* Below poverty line (BPL) indicator
gen below_pov_line = 1 if a22c == 1
replace below_pov_line = 0 if a22c != 1 & !mi(below_pov_line)
label var below_pov_line "Household reports they are below poverty line (BPL)"

* Stove-ownership and stove-use variables
foreach i in 1 2 2bn 6 8 9an_1 9an_2 9bn {
	ren e`i'_11 e`i'_12
	ren e`i'_10 e`i'_11bn
	ren e`i'_09 e`i'_11an
	ren e`i'_08 e`i'_09
	ren e`i'_07 e`i'_08
	ren e`i'_06 e`i'_07bn
	ren e`i'_05 e`i'_07an
	ren e`i'_04 e`i'_06
	ren e`i'_03 e`i'_04
	ren e`i'_02 e`i'_03
 }
replace e1_07an = 1 if e1_07an == 7
label var e1_12 "=1 if hh owns other stove"
label var e1_11bn "=1 if hh owns other ICS"
label var e1_11an "=1 if hh owns greenway"
label var e1_09 "=1 if hh owns sagarh"
label var e1_08 "=1 if hh owns biogas stove"
label var e1_07bn "=1 if hh owns g-coil stove"
label var e1_07an "=1 if hh owns electric stove"
label var e1_06 "=1 if hh owns LPG stove"
label var e1_04 "=1 if hh owns kerosene stove"
label var e1_03 "=1 if hh owns three stone fire stove"
label var e1_01 "=1 if hh owns mitti ka chulha/anjeti stove"
foreach i in 01 03 04 06 07an 07bn 08 09 11an 11bn 12 {
	clonevar stove_own_`i' = e1_`i'
	clonevar number_stove_own_`i' = e2_`i'
	clonevar stove_used_`i' = e6_`i'
	clonevar stove_use_hours_`i' = e9an_1_`i'
	clonevar stove_use_minutes_`i' = e9an_2_`i'
	}
foreach var of varlist stove_used_* {
	replace `var' = 1 if `var' > 1 & !mi(`var')
}

* Fuel-use indicator, fuel-use regularity, fuel-collection time, and fuel-expenditure variables
lab var e18_01 "Use fuel: fuelwood"
lab var e18_02 "Use fuel: crop residue"
lab var e18_03 "Use fuel: leaves"
lab var e18_04 "Use fuel: dung"
lab var e18_05 "Use fuel: biomas pellets"
lab var e18_06 "Use fuel: kerosene"
lab var e18_07 "Use fuel: LPG"
lab var e18_08 "Use fuel: electricity"
lab var e18_09 "Use fuel: biogas"
lab var e18_10 "Use fuel: other"
foreach i in 01 02 03 04 05 06 07 08 09 10 {

	* Fuel use
	clonevar fuel_use_`i' = e18_`i'
	clonevar fuel_use_regularity_`i' = e20_`i'
	
	* Fuel collection time
	clonevar fuel_collection_time_`i' = e24n_1_`i'	
	clonevar fuel_collection_units_`i' = e24n_2_`i'
	
	* Fuel expenditure
	clonevar fuel_expenditure_`i' = e21n_1_`i'
	clonevar fuel_expenditure_units_`i' = e21n_2_`i'
	
	}
* Specifying fuel-collection time in units per the follow-up survey instrument
forval i = 1/10 {	
	if `i' < 10 {	
		gen fuel_collection_minutes_0`i' = .
		if (`i' <= 5) | (`i' > 7) {
			replace fuel_collection_minutes_0`i' = (fuel_collection_time_0`i' / 7) * 60 // Hours per week converted to minutes per day
			}		
		else if (`i' == 6) | (`i' == 7) {
			replace fuel_collection_minutes_0`i' = (fuel_collection_time_0`i' / 30) * 60 // Hours per month converted to minutes per day
			}		
		}	
	else {	
		gen fuel_collection_minutes_`i' = .
		replace fuel_collection_minutes_`i' = (fuel_collection_time_`i' / 7) * 60 // Hours per week converted to minutes per day		
		}
	}
* NB: Survey instrument restricts reported units, as specified below.
foreach i in 01 02 03 04 05 09 {
	replace fuel_collection_units_`i' = 2 if !mi(fuel_collection_time_`i')
}
foreach i in 06 07 {
	replace fuel_collection_units_`i' = 3 if !mi(fuel_collection_time_`i')
	}

* Clean stove and fuel awareness
clonevar heard_stove_lesssmoke = b4
clonevar heard_fuel_lesssmoke = b5
	
* Smoke safety perception
clonevar smoke_safety_perception = b11

* Self-reported fuelwood use
for any 01 02 03 04 05 06 07 08 09 10: gen fueluse_X = e25n_1_X
replace fueluse_01 = fueluse_01 / 7 if e25n_3_01 == 2  // converting weekly to daily 
replace fueluse_01 = fueluse_01 / 30 if e25n_3_01 == 3 // converting monthly to daily
label var fueluse_01 "Fuelwood usage per day, kg - self reported"
ren fueluse_01 fuelwood_used_weight_reported

* Weighed fuelood use
tab e25bn_1_01
tab e25bn_2_01
gen used_fwd = e25bn_1_01 - e25bn_2_01
* Adjustments for extra
replace used_fwd = used_fwd * 1.25 if e25cn_01 == 1 // used less than an extra quarter ; 49 changes made
replace used_fwd = used_fwd * 1.5 if e25cn_01 == 2 // used extra less than half original amount
replace used_fwd = used_fwd * 1.75 if e25cn_01 == 3  // used extra fuel - more than half original but less 
replace used_fwd = used_fwd * 2 if e25cn_01 == 4 // used twice original amount
tab used_fwd
ren used_fwd fuelwood_used_weight
label var fuelwood_used_weight "Amount of firewood used during the monitoring period, kgs - WEIGHED"

* Saving money possible
gen save_possible = .
replace save_possible = 0 if g23_1==0|g23_2==0|g23_3==0  
replace save_possible = 1 if g23_1==1|g23_2==1|g23_3==1  
tab save_possible
label var save_possible "Possible for household to save money at MFI or other bank, etc"
replace save_possible = 0 if save_possible == . & lostin != 1 // 10 

* Credit access behaviour (Took loan in last year)
gen credit = .
replace credit = . if g19_1==-9
replace credit = . if g19_2==-9
replace credit = . if g19_3==-9
replace credit = . if g19_4==-9
replace credit = . if g19_5==-9
replace credit = . if g19_6==-9
replace credit = 0 if g19_1 ==0|g19_2==0|g19_3==0|g19_4==0|g19_5==0|g19_6==0
replace credit = 1 if  g19_1==1|g19_2==1|g19_3==1|g19_4==1|g19_5==1|g19_6==1
tab credit
label var credit "Household has taken loan in past year"
replace credit = 0 if credit == . & lostin2013 != 1

* Relative wealth perception index
gen relative_wealth = g12  
replace relative_wealth = . if g12<0
replace relative_wealth = . if g12>6
label var relative_wealth "Self assessment of relative wealth on 1-6 scale(ladder question)"

* Hours reported of electricity
gen electricity_hours = 0 if g10 == 3
replace electricity_hours = g10_1 if g10 == 2
replace electricity_hours =24 if g10==1
replace electricity_hours =24 if electricity_hours > 24 & electricity_hours !=.
label var electricity_hours "Hours households have electricity per day"

* Merging intervention stove purchase data (from supervisor sheets)
merge m:1 hh_id using "${data}r1_followup_stovepurchase.dta" , keepusing(BuyGway BuyGcoil BuyGcoilGway) gen(_mergeR)
keep if _mergeR == 3
drop _mergeR
* Fixing errors in intervention stove purchase data
replace BuyGway=0 if hh_id==1367
replace BuyGway=0 if hh_id==1478
replace BuyGway=0 if hh_id ==1481
replace BuyGway=0 if hh_id==1490
replace BuyGcoilGway=0 if hh_id==1367
replace BuyGcoilGway=0 if hh_id==1478
replace BuyGcoilGway=0 if hh_id==1481
replace BuyGcoilGway=0 if hh_id==1490
replace BuyGway = 0 if gp_code == 64 & BuyGway == 1 // Never purchased
replace BuyGway = 1 if hh_id == 1162 // Bought a Gway
replace BuyGcoil = 0 if hh_id == 1162 // Did not buy a Gcoil 

* Reported maintenance issues with intervention stoves
label var f16n "Reported no problems with greenway"
label var f16n_b "Reported chopping wood inconvenient with greenway"
label var f16n_c "Reported handles broken problem with greenway"
label var f16n_d "Reported discoloration problem with greenway"
label var f16n_e "Reported hard to light problem with greenway"
label var f16n_f "Reported smoke problem with greenway"
label var f16n_g "Reported 'other' problem with greenway"
// ren f16n gway_prob_none
// ren f16n_b gway_prob_chopping
// ren f16n_c gway_prob_handles
// ren f16n_d gway_prob_discolor
// ren f16n_e gway_prob_hard_light
// ren f16n_f gway_prob_smoke
// ren f16n_g gway_prob_other

label var f16n_2 "Reported no problems with g coil"
label var f16n_2b "Reported shocks problem with g coil"
label var f16n_2c "Reported coil problem with g coil"
label var f16n_2d "Reported plug or wire problem with g coil"
label var f16n_2e "Reported knob problem with g coil"
label var f16n_2f "Reported stove not heating enough with g coil"
label var f16n_2g "Reported light fluctuation/elec bill with g coil"
label var f16n_2h "Reported 'other' problem with g coil"
// ren f16n_2 gcoil_prob_none
// ren f16n_2b gcoil_prob_shocks
// ren f16n_2c gcoil_prob_coil
// ren f16n_2d gcoil_prob_plu_wire
// ren f16n_2e gcoil_prob_knob
// ren f16n_2f gcoil_prob_heat
// ren f16n_2g gcoil_prob_light_bill
// ren f16n_2h gcoil_prob_other

* Problems with Greenway
egen any_problems_greenway = rowmean(f16n_b f16n_c f16n_d f16n_e f16n_f f16n_g)
gen problems_greenway = (!mi(any_problems_greenway) & any_problems_greenway != 0)

* Problems with G-Coil
egen any_problems_gcoil = rowmean(f16n_2b f16n_2c f16n_2d f16n_2e f16n_2f f16n_2g f16n_2h)
gen problems_gcoil = (!mi(any_problems_gcoil) & any_problems_gcoil != 0)

* Tried to get maintenance for Greenway
egen any_maintenance_greenway = rowmean(f17n_b f17n_c f17n_d f17n_e f17n_f f17n_g)
gen maintenance_greenway = (!mi(any_maintenance_greenway) & any_maintenance_greenway != 0)

* Tried to get maintenance for G-Coil
egen any_maintenance_gcoil = rowmean(f17n_2b f17n_2c f17n_2d f17n_2e f17n_2f f17n_2g)
gen maintenance_gcoil = (!mi(any_maintenance_gcoil) & any_maintenance_gcoil != 0)

* Generate state dummy
gen state_code = 5 // UK
label define state_code 5 "UK" 9 "UP"
label values state_code state_code

* Merge treatment allocation variable
merge m:1 hh_id using "${data}r1_followup_trmtallocation.dta", keepusing(control3)	
gen treatment = 1 if control3 == 0
replace treatment = 0 if control3 == 1
label var treatment "Household was selected to receive the ICS sales offer intervention"
drop control3

* Drop duplicates
duplicates report hh_id if state_code == 5
duplicates tag hh_id, g(hh_idDUP) 
drop if hh_id == 1852 & gpname == "Lili"
drop hh_idDUP

* Merging rebate level allocations
merge m:1 hh_id using "${data}r1_followup_rbtallocation.dta", keepusing(Rebate maxlclass1 maxlclass2 maxlclass3) gen(_mergeReb)
label var Rebate "Assigned rebate level for all UK: 1=25, 2=200, 3=1/3 price"
drop _mergeReb

* Merging community-level survey
merge m:1 gp_code using "${data}r1_followup_commsurvey.dta", keepusing (docdist bus_min bankfac) gen(_mergeM1)

* Final cleaning
replace tokname = "Mongro" if gp_code == 61 & tokname == "Mogro"
replace lostin2013 = 1 if hh_id == 1617 | hh_id == 1650 | hh_id == 1853 | hh_id == 1919 | hh_id == 1932 | ///
	hh_id == 1946 | hh_id == 1992 | hh_id == 2001 | hh_id == 2008 | hh_id == 2009 | hh_id == 2052 | hh_id == 2102
replace headhhname = "Pari Ram" if hh_id == 1175

* Visited by Chirag
clonevar chirag_visit = f7n
replace chirag_visit = f7n_2 if mi(chirag_visit) & !mi(f7n_2)
lab var chirag_visit "Did someone from Chirag come to this household to talk about purchasing ICS?"

* Generate surveyround variable
gen surveyround = 1
label var surveyround "survey round; 0 = baseline, 1 = follow-up 1, 2 = follow-up 2"

* Order and rename variables

keep hh_id gpname gp_code surveyround IDcompleted treatment lostin2013 hhmissing  ///
	hh_member_* age_year_* age_month_* age_day_* household_head_* sex_* cooks_* primary_cook_* education_years_* ///
	school_indicator_* cough_or_cold_* below_pov_line stove_own_* number_stove_own_* stove_used_* stove_use_hours_* stove_use_minutes_* ///
	fuel_use_* fuelwood_used_weight fuelwood_used_weight_reported fuel_collection_minutes* fuel_expenditure_* problems_* maintenance_* heard_stove_* heard_fuel_* smoke_safety_* ///
	Rebate maxlclass1 maxlclass2 maxlclass3 BuyGway BuyGcoil BuyGcoilGway save_possible credit relative_wealth electricity_hours ///
	docdist bus_min bankfac chirag_visit

order hh_id gpname gp_code surveyround IDcompleted treatment lostin2013 hhmissing ///
	hh_member_* age_year_* age_month_* age_day_* household_head_* sex_* cooks_* primary_cook_* education_years_* ///
	school_indicator_* cough_or_cold_* below_pov_line stove_own_* number_stove_own_* stove_used_* stove_use_hours_* stove_use_minutes_* ///
	fuel_use_* fuelwood_used_weight fuelwood_used_weight_reported fuel_collection_minutes* fuel_expenditure_* problems_* maintenance_* heard_stove_* heard_fuel_* smoke_safety_* ///
	Rebate maxlclass1 maxlclass2 maxlclass3 BuyGway BuyGcoil BuyGcoilGway save_possible credit relative_wealth electricity_hours ///
	docdist bus_min bankfac chirag_visit

foreach var of varlist stove_own* number_stove_own* stove_used_* stove_use_* heard_stove* heard_fuel* {
	ren `var' R1_`var'
	}

* SAVE: ROUND 1 ****************************************************************

save "${output}r1_followup_cleaned.dta", replace
